# Merging with AP3 model 
library(pacman)
p_load(
  here, data.table, fst, readxl, stringr, collapse
)

# Goal: Calculate marginal damages for each power plant 
# 1) Load emissions rates for each plant (so2, co2, nox, pm2.5)
# 2) Merge plants to marginal damages per ton of pollutant (using fips and stack height)
SOCIAL_COST_OF_CARBON = 51*0.7993 # 51 in 2022 dollars to 2014
# 3) Multiply emissions rates by marginal damages

# First loading unit info table 
unit_info_dt = 
  read.fst(
    here("Data/electricity-generation/unit-info-dt.fst"),
    as.data.table = TRUE
  )

# Now the emissions rates 
emissions_rate_dt = 
  merge(
    read.fst(
      here('Data/electricity-generation/unit-model-fit/emissions-rate-dt.fst'),
      as.data.table = TRUE
    ),
    unit_info_dt,
    by = c('orispl_code', 'unitid')
  )[,.(
    orispl_code, unitid, 
    fips, 
    pollutant, 
    # For now we only have low/medium damages.
    stack_height = fcase(
      stack_height < 250, 'low',
      stack_height >= 250, 'medium'#, & stack_height <= 500,
      #stack_height > 500, 'tall'
    ),
    output, 
    gload_mwh_split, 
    tot_gload,
    # Converting emissions rates to tons/mwh, so2+nox in lbs
    emissions_rate = fcase(
      pollutant %in% c('so2','nox'), emissions_rate/2000,
      pollutant == 'co2', emissions_rate
    )
  )]

# Reading in pm2.5 emissions rates 
pm25_emissions_dt = fread(
  here('Data/electricity-generation/output/emissions-rate-avg.csv')
)

# Adding PM2.5 to rest of pollutants 
emissions_rate_dt = 
  rbind(
    emissions_rate_dt[pollutant != 'pm25'],
    merge(
      emissions_rate_dt[,.(
        orispl_code, unitid, 
        fips, stack_height, 
        output, gload_mwh_split,
        tot_gload
      )] |>unique(),
      pm25_emissions_dt[,.(
        orispl_code, unitid, pollutant = 'pm25',
        emissions_rate = pm25_tons_per_mw
      )],
      by = c('orispl_code','unitid')
    ),
    use.names = TRUE
  )

# Now for marginal damages
fips_dt = read_xlsx(
  path = here('Data/Marginal Damages (2011) from Holland, Mansur, Muller, Yates AER forthcoming.xlsx'),
  sheet = 'ground-level MD per ton'
  ) %>%  
  data.table() %>% 
  .[,.( # Miami-Dade changed their fips code
    fips = fcase(
      fips == '12025', '12086',
      fips != '12025', str_pad(fips, 5,'left','0')
    )
  )]

# TODO: Figure out county list for tall stack heights
md_dt = 
  rbind(
    cbind(
      fips_dt,
      fread(here("Data/AP3/JointFolder/md_L_2014_CS.csv"))[,stack_height := 'low']
    ),
    cbind(
      fips_dt,
      fread(here("Data/AP3/JointFolder/md_M_2014_CS.csv"))[,stack_height := 'medium']
    )
  ) |> 
  setnames(new = c('fips','nh3','nox','pm25','so2','voc','stack_height')) |>
  melt(
    id.vars = c('fips','stack_height'),
    variable.name = 'pollutant',
    value.name = 'marginal_damage'
  )

# Merging emissions data
emissions_md_dt = 
  merge(
    emissions_rate_dt, 
    md_dt,
    by = c('fips','stack_height','pollutant'),
    all.x = TRUE
  )

# Adding social cost of carbon 
emissions_md_dt[,marginal_damage := fcase(
  pollutant == 'co2', SOCIAL_COST_OF_CARBON,
  pollutant != 'co2', marginal_damage
)]

# Calculating damages per mwh of electricity produced
emissions_md_dt[,md_per_mwh := emissions_rate*marginal_damage]

# Casting into wide format
emissions_md_dt_wide = 
  dcast(
    emissions_md_dt,
    fips + orispl_code + unitid + stack_height + gload_mwh_split ~ output + pollutant,
    value.var = 'md_per_mwh'
  )
setnames(
  emissions_md_dt_wide,
  new = colnames(emissions_md_dt_wide) |> str_replace('emissions_rate','md_per_mwh')
)


# Aggregating over all of the pollutants
tot_md_dt_wide = emissions_md_dt[,.(
  damages_per_mwh = sum(md_per_mwh, na.rm =TRUE)),
  keyby = .(fips, orispl_code, unitid, output, stack_height, gload_mwh_split)
] 

tot_md_dt =
  dcast(
    tot_md_dt_wide,
    orispl_code + unitid ~ output,
    value.var = 'damages_per_mwh'
  ) |>
  setnames(
    old = c('emissions_rate_high','emissions_rate_low'),
    new = c('md_per_mwh_high','md_per_mwh_low')
  ) |>
  merge(
    emissions_md_dt_wide,
    by = c('orispl_code','unitid')
  )

# Saving the results
write.csv(
  tot_md_dt, 
  here('Data/electricity-generation/output/damage-per-mwh.csv')
)

tot_md_dt = 
fread(
  here('Data/electricity-generation/output/damage-per-mwh.csv')
)

p_load(ggplot2, scales, haven)

damages_hist = 
  ggplot(tot_md_dt_wide) +
  geom_histogram(
    aes(x = damages_per_mwh/1000, fill = output), 
    alpha= 0.5, 
    bins = 100,
    position = 'Identity'
  ) +
  scale_x_log10(labels = label_dollar(accuracy = 0.001)) + #labels = scales::label_dollar()
  theme_minimal() + 
  labs(
    x = 'Marginal Damage per KWh (Log scale)',  
    y = 'Number of Electricity Generating Units'
  )+   
  scale_fill_brewer(
    name = 'Elec Production',
    labels = c('High','Low'),
    palette = 'Set1'
  ) +
  facet_grid(
    rows = vars(stringr::str_to_title(paste(stack_height,'stacks'))), scales = 'free') + 
  theme(legend.position = 'bottom')
ggsave(
  plot = damages_hist,
  filename = 'figures/electricity-generation/damages-hist.jpeg',
  width = 6, height = 5,
  bg = 'white'
)  



